package com.shujia.spark.core

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object Demo10Join {
  def main(args: Array[String]): Unit = {


    val conf: SparkConf = new SparkConf()
      .setAppName("map")
      .setMaster("local")

    //spark  上下文对象
    val sc = new SparkContext(conf)

    //读取学生表
    val students: RDD[String] = sc.textFile("data/students.txt")
    //读取分数表
    val scores: RDD[String] = sc.textFile("data/score.txt")


    //将RDD转换成kv格式

    val studentKVRDD: RDD[(String, String)] = students.map(student => {
      val split: Array[String] = student.split(",")
      val id: String = split(0)
      //一学号作为key,学生信息作为value
      (id, student)
    })


    val scoreKVRDD: RDD[(String, String)] = scores.map(score => {
      val split: Array[String] = score.split(",")
      val id: String = split(0)
      //一学号作为key ,学生信息作为value
      (id, score)
    })

    /**
      * join: 默认是内连接
      * 通过key进行关联
      */
    val innerJoinRDD: RDD[(String, (String, String))] = studentKVRDD.join(scoreKVRDD)

    //关联之后整理数据
    val resultRDD: RDD[(String, String)] = innerJoinRDD.map {
      case (id: String, (studentInfo: String, scoreInfo: String)) =>
        val name: String = studentInfo.split(",")(1)
        val score: String = scoreInfo.split(",")(2)
        (name, score)
    }

    //resultRDD.foreach(println)


    /**
      * leftOuterJoin: 一左边为基础，如果右边没有用null 代替
      *
      */

    val leftOuterJoinRDD: RDD[(String, (String, Option[String]))] = studentKVRDD.leftOuterJoin(scoreKVRDD)


    val leftOuterResultRDD: RDD[String] = leftOuterJoinRDD.map {
      //关联上处理方式
      case (id: String, (studentInfo: String, Some(scoreInfo))) =>
        studentInfo + "\t" + scoreInfo

      //没有关联上处理方式
      case (id: String, (studentInfo: String, None)) =>
        studentInfo + "\t" + "默认"
    }

    leftOuterResultRDD.foreach(println)

    /**
      * fullOuterJoin
      */
    val fullOuterJoinRDD: RDD[(String, (Option[String], Option[String]))] = studentKVRDD.fullOuterJoin(scoreKVRDD)

    fullOuterJoinRDD.map {
      //关联上处理方式
      case (id: String, (Some(studentInfo), Some(scoreInfo))) =>
        studentInfo + "\t" + scoreInfo

      //没有关联上处理方式
      case (id: String, (Some(studentInfo), None)) =>
        studentInfo + "\t" + "默认"

      //没有关联上处理方式
      case (id: String, (None, Some(scoreInfo))) =>
        "默认" + "\t" + scoreInfo

    }

  }

}
